{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "用Python实现随机产生个个体的函数gen_individuals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_individuals(k, gen_num, input_data, featureIdx, nMax=10):\n",
    "    \"\"\"产生k个个体, gen_num表示每个体对应的固定基因数量\"\"\"\n",
    "    indiv_list = []\n",
    "    gene_list = []\n",
    "    for e in range(k):\n",
    "        indiv = {}\n",
    "        gene = []\n",
    "        for i in range(gen_num):\n",
    "            out = random_get_tree(input_data, featureIdx, nMax) \n",
    "            indiv[\"g\"+str(i+1)]=out['f_value']\n",
    "            gene.append(out['tree_exp'])\n",
    "        indiv = pd.DataFrame(indiv)\n",
    "        indiv_list.append(indiv)\n",
    "        gene_list.append(gene)\n",
    "    return {\"df\":indiv_list, \"gene\": gene_list}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_get_tree(input_data,featureIdx,nMax=10):\n",
    "    \"\"\" \n",
    "    从原始数据特征中，随机获取特征表达树  \n",
    "    featureIdx: 原始特征的下标数值，最小从1开始\n",
    "    nMax:一次最多从特征中可放回抽样次数，默认为10\n",
    "    \"\"\"\n",
    "    data = pd.DataFrame({\"X\"+str(e):input_data.iloc[:,(e-1)].values for e in featureIdx})\n",
    "    \n",
    "    # 随机抽取N个特征下标\n",
    "    N = random.choice(range(2,nMax+1))\n",
    "    \n",
    "    # 随机决定是使用满二叉树还是偏二叉树\n",
    "    if random.choice([0,1]) == 1:\n",
    "        # 选择满二叉树\n",
    "        select_feature_index = [random.choice(featureIdx) for i in range(N)]+[0]*int(2**np.ceil(np.log2(N)) - N)\n",
    "        random.shuffle(select_feature_index)\n",
    "        select_feature_index = ['data.X'+str(e)+\".values\" if e> 0 else '0' for e in select_feature_index]\n",
    "        tree_exp = gen_full_tree_exp(select_feature_index)\n",
    "    else:\n",
    "        # 选择偏二叉树\n",
    "        select_feature_index = ['data.X'+str(e)+\".values\" for e in [random.choice(featureIdx) for i in range(N)]]\n",
    "        tree_exp =  gen_side_tree_exp(select_feature_index)\n",
    "    return {\"f_value\":eval(tree_exp),\"tree_exp\":tree_exp.replace(\"data.\",\"\").replace(\".values\",\"\")}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#构建偏二叉树，并生成数学表达式\n",
    "def gen_side_tree_exp(var_flag_array):\n",
    "    if len(var_flag_array) == 1:\n",
    "        return add_one_group(var_flag_array[0])\n",
    "    else:\n",
    "        var_flag_array[1] = 'g('+random.choice(two_group)+','+add_one_group(var_flag_array[0])+','+add_one_group(var_flag_array[1])+')'\n",
    "        del var_flag_array[0]\n",
    "        return gen_side_tree_exp(var_flag_array)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建满二叉树，并生成数学表达式\n",
    "def gen_full_tree_exp(var_flag_array):\n",
    "    half_n = len(var_flag_array)//2\n",
    "    middle_array = []\n",
    "    for i in range(half_n):\n",
    "        if var_flag_array[i] == '0' and var_flag_array[i+half_n] != '0':\n",
    "            middle_array.append('g('+random.choice(one_group)+','+add_one_group(var_flag_array[i+half_n])+')')\n",
    "        elif var_flag_array[i] != '0' and var_flag_array[i+half_n] == '0':\n",
    "            middle_array.append('g('+random.choice(one_group)+','+add_one_group(var_flag_array[i])+')')\n",
    "        elif var_flag_array[i] != '0' and var_flag_array[i+half_n] != '0':\n",
    "            middle_array.append('g('+random.choice(two_group)+','+add_one_group(var_flag_array[i])+','+add_one_group(var_flag_array[i+half_n])+')')\n",
    "    if len(middle_array) == 1:\n",
    "        return add_one_group(middle_array[0])\n",
    "    else:\n",
    "        return gen_full_tree_exp(middle_array)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 随机增加一元运算符\n",
    "def add_one_group(feature_string, prob=0.3):\n",
    "    return 'g('+random.choice(one_group)+','+feature_string+')' if np.random.uniform(0, 1) < prob else feature_string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "min_number = 0.01\n",
    "\n",
    "# 一元运算\n",
    "def log(x):\n",
    "    return np.sign(x)*np.log2(np.abs(x)+1)\n",
    "\n",
    "def sqrt(x):\n",
    "    return np.sqrt(x-np.min(x)+min_number)\n",
    "\n",
    "def pow2(x):\n",
    "    return x**2\n",
    "\n",
    "def pow3(x):\n",
    "    return x**3\n",
    "\n",
    "def inv(x):\n",
    "    return 1*np.sign(x)/(np.abs(x)+min_number)\n",
    "\n",
    "def sigmoid(x):\n",
    "    if np.std(x) < min_number:\n",
    "        return x\n",
    "    x = (x - np.mean(x))/np.std(x)\n",
    "    return (1 + np.exp(-x))**(-1)\n",
    "\n",
    "def tanh(x):\n",
    "    if np.std(x) < min_number:\n",
    "        return x\n",
    "    x = (x - np.mean(x))/np.std(x)\n",
    "    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))\n",
    "\n",
    "def relu(x):\n",
    "    if np.std(x) < min_number:\n",
    "        return x\n",
    "    x = (x - np.mean(x))/np.std(x)\n",
    "    return np.array([e if e > 0 else 0 for e in x])\n",
    "\n",
    "def binary(x):\n",
    "    if np.std(x) < min_number:\n",
    "        return x\n",
    "    x = (x - np.mean(x))/np.std(x)\n",
    "    return np.array([1 if e > 0 else 0 for e in x])\n",
    "\n",
    "# 二元运算\n",
    "def add(x,y):\n",
    "    return x + y\n",
    "\n",
    "def sub(x,y):\n",
    "    return x - y\n",
    "\n",
    "def times(x,y):\n",
    "    return x * y\n",
    "\n",
    "def div(x,y):\n",
    "    return x*np.sign(y)/(np.abs(y)+min_number)\n",
    "\n",
    "two_group = ['add', 'sub', 'times', 'div']\n",
    "one_group = ['log', 'sqrt', 'pow2', 'pow3', 'inv', 'sigmoid', 'tanh', 'relu', 'binary']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def g(f, a, b=None):\n",
    "    \"\"\"\n",
    "    f: 一元或二元运算函数\n",
    "    a: 第一个参数\n",
    "    b: 如果f是一元运算函数，则b为空，否则代表二元运算的第二个参数\n",
    "    \"\"\"\n",
    "    if b is None:\n",
    "        return f(a)\n",
    "    else:\n",
    "        return f(a,b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用gen_individuals得到随机产生的种群数据和特征表达式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "____________________________________________\n",
      "    g1         g2         g3        g4\n",
      "0  1.2 -46.499130 -60.126805  5.528692\n",
      "1  1.2  17.889969 -60.479861  6.373917\n",
      "____________________________________________\n",
      "         g1        g2   g3        g4\n",
      "0  0.020907  1.032495  0.0 -4.758767\n",
      "1  0.020907  0.131816  0.0 -4.550616\n",
      "____________________________________________\n",
      "    g1        g2   g3        g4\n",
      "0 -0.4  1.946099  6.5  6.019048\n",
      "1 -1.4  1.946099  6.3  6.019048\n",
      "____________________________________________\n",
      "         g1        g2        g3   g4\n",
      "0 -2.804965  1.202852 -0.000304  2.8\n",
      "1 -2.375887  0.980971 -0.000000  2.8\n",
      "____________________________________________\n",
      "         g1         g2        g3        g4\n",
      "0  0.894949  43.589623  0.809632  0.934779\n",
      "1  0.729297  27.714623  4.277071  0.667053\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import random\n",
    "\n",
    "iris = pd.read_csv(\"http://image.cador.cn/data/iris.csv\")\n",
    "gen_out = gen_individuals(5,4,iris,[1,2,3,4])\n",
    "for x in gen_out['df']:\n",
    "    print(\"____________________________________________\")\n",
    "    print(x.head(2))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
